#######################################################
#Code for Replicating Rule Significance Estimation
# by Fang-Yi Chiou and Jonathan Klingler
# Aug 15, 2022
###################################################

library (runjags)
library (coda)
library (reshape2)
library (random)
library (scales)
library (ggplot2)


#Load the rater data
rm(list=ls())
data<- read.table("Desktop/raters_30March22.txt")

#Prepare for the data file used in Jags code
y.count<- cbind(as.numeric(data$Major), as.numeric(data$Priority), as.numeric(data$Regulatory), as.numeric(data$StDeadline),as.numeric(data$JuDeadline),    as.numeric(data$NYT),  as.numeric(data$Hearing), as.numeric(data$ANPRM_count), as.numeric(data$NPRM_count), as.numeric(data$PublicComment),   as.numeric(data$Page),as.numeric(data$lobby), as.numeric(data$Hein_RIN), as.numeric(data$WP_O), as.numeric(data$WP_F))
Cvar<- cbind(as.numeric(data$Firstyear), as.numeric(data$UAissue), as.numeric(data$UAissue.1))
 
#Jags code for estimating rule significance
jags.PNBIRT<- "model{
## Likelihood
for(i in 1:N){
  ##Dichotomous raters
for (j in 1:(M-M1)){
y[i,j] ~ dbern(pi[i,j])
probit(pi[i,j]) <- beta[j]*X[i] + alpha[j]
}
  ##Count raters without control variables
for(j in (M-M1+1):(M-3)){
y[i,j] ~ dnegbin(p[i, j], r[j-M+M1])
p[i, j] <- r[j-M+M1]/(r[j-M+M1]+lambda[i, j])
log(lambda[i,j]) <- mu[i,j]
mu[i,j]<- alpha[j]+beta[j]*X[i]
}
  ##Count raters with control variables
for(j in (M-2):(M-2)){
y[i,j] ~ dnegbin(p[i, j], r[j-M+M1])
p[i, j] <- r[j-M+M1]/(r[j-M+M1]+lambda[i, j])
log(lambda[i,j]) <- mu[i,j]
mu[i,j]<- alpha[j]+beta[j]*X[i]+delta[1]*log(Z[i,1])
}

for(j in (M-1):(M-1)){
y[i,j] ~ dnegbin(p[i, j], r[j-M+M1])
p[i, j] <- r[j-M+M1]/(r[j-M+M1]+lambda[i, j])
log(lambda[i,j]) <- mu[i,j]
mu[i,j]<- alpha[j]+beta[j]*X[i]+delta[2]*log(Z[i,2])
}

for(j in M:M){
y[i,j] ~ dnegbin(p[i, j], r[j-M+M1])
p[i, j] <- r[j-M+M1]/(r[j-M+M1]+lambda[i, j])
log(lambda[i,j]) <- mu[i,j]
mu[i,j]<- alpha[j]+beta[j]*X[i]+delta[3]*log(Z[i,3])
}
}

## Priors for item parameters
beta[1] ~ dnorm(0, 0.25)I(0,) # items constrained to identify
beta[2] ~ dnorm(0, 0.25)I(0,) # items constrained to identify
alpha[1] ~ dnorm(0, 1) 
alpha[2]~ dnorm(0, 1)
for(j in 3:M){
alpha[j] ~ dnorm(0,1) # uniltivariate Normal prior
beta[j] ~ dnorm(0,1) # uniltivariate Normal prior
}

## Priors for rule significance
for(i in 1:N){
X[i] ~ dnorm(0,1) # uniltivariate Normal prior
}

## Priors for the coefficients of control variables
for(i in 1:3){
delta[i] ~ dnorm(0,1) # uniltivariate Normal prior
}

## Priors for dispersion paramesters
for(j in 1:M1){
r[j] ~ dunif(0,50)
}
}"

##Supplying the data
PNBIRT.data <- dump.format(list(y=y.count
                                , N=nrow(y.count)
                                , M=ncol(y.count)
                                ,M1=8
                                ,Z=Cvar))

##Specify parameters
PNBIRT.parameters = c("X", "alpha", "beta", "delta", "r", "deviance")

 
set.seed(98002)
PNBIRT.inits.1 <- function() {
  dump.format(
    list(
       X = rnorm(nrow(y.count))
      , alpha = rnorm(ncol(y.count))
      , beta = c(2, 2, rnorm(ncol(y.count)-2))
      ,delta= rnorm(3)
      ,r=rep(1,8)
#      ,.RNG.name="base::Super-Duper"
## The results used in this project were generated using the default random number generator option
      ,.RNG.seed=98002
    )
  )
}

set.seed(50388)
PNBIRT.inits.2 <- function() {
  dump.format(
    list(
      X = rnorm(nrow(y.count))
      , alpha = rnorm(ncol(y.count))
      , beta = c(2, 2, rnorm(ncol(y.count)-2))
      ,delta= rnorm(3)
      ,r=rep(1,8)
#      ,.RNG.name="base::Wichmann-Hill"
## The results used in this project were generated using the default random number generator option
      ,.RNG.seed=50388
    )
  )
}

cleanup.jags()
set.seed(50388)
 ##Simulate parameters
PNBIRT.model <- run.jags(
  model=jags.PNBIRT,
  monitor=PNBIRT.parameters,
  method="parallel",
  n.chains=2,
  data=PNBIRT.data,
  inits=list (PNBIRT.inits.1(), PNBIRT.inits.2()),
  thin=30, burnin=3000, sample=200,
check.conv=FALSE, plots=FALSE)

chainsPNBIRT <- mcmc.list(list (PNBIRT.model$mcmc[[1]], PNBIRT.model$mcmc[[2]]))


##Table 1
load("Desktop/MCMC results for the IRT model.Rda")
Beta.e   <- rbind (chainsPNBIRT[[1]][,grep("beta",  colnames (chainsPNBIRT[[1]]))],
                   chainsPNBIRT[[2]][,grep("beta",  colnames (chainsPNBIRT[[2]]))])

Alpha.e   <- rbind (chainsPNBIRT[[1]][,grep("alpha",  colnames (chainsPNBIRT[[1]]))],
                    chainsPNBIRT[[2]][,grep("alpha",  colnames (chainsPNBIRT[[2]]))])

X.e   <- rbind (chainsPNBIRT[[1]][,grep("X",  colnames (chainsPNBIRT[[1]]))],
                chainsPNBIRT[[2]][,grep("X",  colnames (chainsPNBIRT[[2]]))])

Dispersion.e<-rbind (chainsPNBIRT[[1]][,grep("r",  colnames (chainsPNBIRT[[1]]))],
                     chainsPNBIRT[[2]][,grep("r",  colnames (chainsPNBIRT[[2]]))])

Delta.e<- rbind (chainsPNBIRT[[1]][,grep("delta",  colnames (chainsPNBIRT[[1]]))],
                 chainsPNBIRT[[2]][,grep("delta",  colnames (chainsPNBIRT[[2]]))])

Deviance<- rbind (chainsPNBIRT[[1]][,grep("deviance",  colnames (chainsPNBIRT[[1]]))],
                  chainsPNBIRT[[2]][,grep("deviance",  colnames (chainsPNBIRT[[2]]))])


Beta<- round(cbind(apply(Beta.e, 2, mean), apply(Beta.e, 2, sd)), 3)
Alpha<- round(cbind(apply(Alpha.e, 2, mean), apply(Alpha.e, 2, sd)),3)
Item<- cbind(Beta, Alpha)
rownames(Item)<- c("Major", "Priority", "Regulatory", "StDeadline","JuDeadline",    "NYT",  "Hearing", "ANPRM_count", "NPRM_count", "PublicComment",   "Page","lobby", "Hein_RIN", "WP_O", "WP_F")
colnames(Item)<- c("Discrimination", "Standard errors", "Difficulty", "Standard error")
Item


##Rule significance (Figure 1)
rule<- read.table("Desktop/supplemental.txt")
x.jags<- rule$SIG 
plot(density(x.jags)
     , xlab="Significance scores", ylab="Density"
     , main=""
     #, main="Figure 1. Significance distribution of rules, 1993-2019"
)

#Figure 2
#Merge Supplemental Data with Rater Data
data$RIN<-row.names(data)
datanew<- read.table("Desktop/supplemental.txt")
datafigure2<-merge(datanew,data,by="RIN")

#Plot Figure 2
ggplot(datanew, aes(x=reorder(DEPT_ABB, SIG, FUN=median), y=SIG, fill=independent)) + geom_boxplot(outlier.size=.1) + 
  labs(y ="Distributions of Agency Proposed Rules' Significance Scores", x = "Agency Abbreviations", fill="Agency Structure") + coord_flip() + 
  scale_fill_manual(values = c("grey70", "grey45"))  + theme(text=element_text(size=7),legend.key.height= unit(1, 'cm'),legend.position="bottom")

ggsave(width = 6.48, height = 8.2, dpi = 300, filename = "Figure2.pdf")

##Figure A.1
plot(datafigure2$SIG, datafigure2$Priority
     , xlab="Estimated significance scores", ylab="The rater of Priority"
     #     , main="Figure 2. The correlation between estimated scores and priority rater"
)

plot(datafigure2$SIG, datafigure2$Major,
     , xlab="Estimated significance scores", ylab="The rater of Major"
     #     , main="Figure 2. The correlation between estimated scores and priority rater"
)


##Figure A.2
plot(rule$SIG, rule$impact
     , xlab="Estimated significance scores", ylab="Potter's (2017) impact scores"
     #     , main="Figure 3. The correlation between two measures"
)

#Table A.1 - Compare RINs in 2000 Federal Register with RINs in 2000 Unified Agenda
a1data<-read.csv("tableA1data.csv")
CrossTable(a1data$RIN_IN_FR, a1data$RIN_IN_UA)

#Table A.2 - Compare RINs in 2000 Federal Register with RINs in 1999-2001 Unified Agenda
a2data<-read.csv("tableA2data.csv")
CrossTable(a2data$RIN_IN_FR, a2data$RIN_IN_UA)

#Table A.4 - Rules Universe Component Categories
#Use Merged Rater and Supplemental Datasets to iteratively remove RIN observations meeting defined characteristics to create Table A.4
iter1<-datafigure2
table(iter1$POTTER, useNA="ifany")
#Remove 11022 Observations in Potter (2017) Dataset
iter2<-subset(iter1, POTTER==0|is.na(POTTER))
table(iter2$POTTER, useNA="ifany")
#Remove 980 RINs only appearing in the Spring 1995 issue of the UA for which the digitized UA issues are not available
iter3<-subset(iter2, POTTER==0)
table(iter3$OIRA_AGENCY,useNA="ifany")
#Remove 6489 RINs proposed by agencies not bound by OIRA review (independent)
iter4<-subset(iter3, OIRA_AGENCY==1)
table(iter4$DIRECT_UAFR,useNA="ifany")
#Remove 416 RINs promulgated as a Direct Final Rule without an NPRM
iter5<-subset(iter4, DIRECT_UAFR==0)
table(iter5$INTERIM_UAFR, useNA="ifany")
#Remove 2388 RINs promulgated as an Interim Final Rule without an NPRM
iter6<-subset(iter5, INTERIM_UAFR==0)
table(iter6$NPRM_CITATION, useNA="ifany")
#Remove 14158 RINs for which no NPRM was cited in the Unified Agenda
iter7<-subset(iter6, NPRM_CITATION==1)
table(iter7$PUBLISHED_NPRM_PRE2015, useNA="ifany")
#Remove 1757 RINs which have NPRMs published after 2014
iter8<-subset(iter7, PUBLISHED_NPRM_PRE2015==0)
table(iter8$MERGED_RIN, useNA="ifany")
#Remove 8 RINs which were merged into other RINs at some point
iter9<-subset(iter8, MERGED_RIN==0)
#Final Result is 2093 RINs which were previously unobserved


##Table A.5
ratergo<- rep(0, ncol(y.count))
rater.m<- rep(0, ncol(y.count))
for (i in 1:ncol(y.count)){
  ratergo[i]<- length(subset(y.count[,i], y.count[,i]>=1))
  rater.m[i]<- length(subset(y.count[,i], is.na(y.count[,i])))
}
sum.data<- cbind(apply(y.count, 2,min, na.rm=TRUE), ratergo, apply(y.count, 2,max, na.rm=TRUE), rater.m)
sum.data

